pckg <- c("rio", "lubridate", "tidyverse", "plotly")
lapply(pckg, require, character.only=TRUE)
## Warning: package 'tibble' was built under R version 3.6.2
I could not get quantmod to work hence I had to do it manually. The Date column was imported as a character but then changed into a date format. Another column was added called Stock that held the ticker symbol for each stock being imported.
The naming convention[^1] is the name of the stock followed by ‘DF’ [^1]: I am using camel case for naming the rest of objects.
uniqloDF <- rio::import("Stock/FRCOY.csv") %>% filter(Open != 'null') %>% #uniqlo
mutate(Date = as.Date(Date)) %>% mutate(Stock = "FRCOY")
luluDF <- rio::import("Stock/LULU.csv") %>% filter(Open != 'null') %>% #Lululemon
mutate(Date = as.Date(Date)) %>% mutate(Stock = "LULU")
shopDF <- rio::import("Stock/SHOP.csv") %>% filter(Open != 'null') %>% #Shopify
mutate(Date = as.Date(Date)) %>% mutate(Stock = "SHOP")
wmtDF <- rio::import("Stock/WMT.csv") %>% filter(Open != 'null') %>% #Walmart
mutate(Date = as.Date(Date)) %>% mutate(Stock = "WMT")
tgtDF <- rio::import("Stock/TGT.csv") %>% filter(Open != 'null') %>% #Target
mutate(Date = as.Date(Date)) %>% mutate(Stock = "TGT")
costDF <- rio::import("Stock/COST.csv") %>% filter(Open != 'null') %>% #Costco
mutate(Date = as.Date(Date)) %>% mutate(Stock = "COST")
Rows are binded in a nested format, making use of the bind_rows(., df syntax.
#bind rows to create a combined data frame
comDF <- bind_rows(uniqloDF, luluDF) %>% bind_rows(., shopDF) %>%
bind_rows(., wmtDF) %>% bind_rows(., tgtDF) %>% bind_rows(., costDF) %>%
mutate(Stck = as.factor(Stock))
The dataframe for the last day of trading information is extracted by group the data by Year and Date. Then the data is filtered (select observations meeting condition) to extract the rows with the last date of the month. Lastly, the Close column is extracted. The dataframe for first day follows the same process as that of first day. The main differences are that the rows with the lowest date value for each month and the Open column are extracted. The dataframe for total volume/strong> for each month is extracted by using the same grouping formula as above. thereafter each grouping is summarized by finding the total volume for each grouping. The combined monthly data data frame is constructed by joining the three previous data frames by using the inner_join function. The parameters for the join are Year, Month and Stock. Two additional columns are added that calculate the increase in stock price and the percentage increase in stock price.
#last day data
lastDay <- comDF %>% group_by(Year=year(Date), Month=month(Date), Stock) %>%
filter(Date == max(Date)) %>%
select(Close)
## Adding missing grouping variables: `Year`, `Month`, `Stock`
#first day data
firstDay <- comDF %>% group_by(Year=year(Date), Month=month(Date), Stock) %>%
filter(Date == min(Date)) %>%
select(Open)
## Adding missing grouping variables: `Year`, `Month`, `Stock`
#monthly volume data
monthVol <- comDF %>% group_by(Year=year(Date), Month=month(Date), Stock) %>%summarise(TotVolume = sum(Volume))
#combined end of month data
monthData <- inner_join(lastDay, firstDay, by = c( 'Year', 'Month', 'Stock' )) %>%
inner_join(., monthVol, by = c( 'Year', 'Month', 'Stock' )) %>%
mutate(Incr = Close - Open) %>%
mutate(PercInc = 100*Incr/Open)
The monthly data is grouped by stock and the observations that meet the conditions are filtered out. Using the arrange function, they are placed in either descending or ascending order. The default is ascending. The topN variable is used to input how many of the top results to view.
topN <- as.integer(5)
#stocks and their biggest increase in a month
monthData %>% group_by(Stock) %>% filter(PercInc == max(PercInc)) %>%
arrange(desc(PercInc)) %>% top_n(topN, PercInc)
## # A tibble: 6 x 8
## # Groups: Stock [6]
## Year Month Stock Close Open TotVolume Incr PercInc
## <dbl> <dbl> <chr> <dbl> <dbl> <int> <dbl> <dbl>
## 1 2020 4 SHOP 632. 403. 87066900 229. 56.9
## 2 2020 5 LULU 300. 219. 38631100 81.0 37.0
## 3 2020 4 FRCOY 47.7 38.0 618000 9.70 25.5
## 4 2019 8 TGT 107. 86.2 181561800 20.8 24.2
## 5 2017 11 COST 184. 162. 57900700 22.5 13.9
## 6 2017 10 WMT 87.3 77.9 198775100 9.41 12.1
#stocks and their highest volume in a month
monthData %>% group_by(Stock) %>% filter(TotVolume == max(TotVolume)) %>%
arrange(desc(TotVolume)) %>% top_n(topN, TotVolume)
## # A tibble: 6 x 8
## # Groups: Stock [6]
## Year Month Stock Close Open TotVolume Incr PercInc
## <dbl> <dbl> <chr> <dbl> <dbl> <int> <dbl> <dbl>
## 1 2015 10 WMT 57.2 64.8 347169000 -7.52 -11.6
## 2 2017 3 TGT 55.2 58.8 206344800 -3.59 -6.11
## 3 2020 3 COST 285. 294. 125473600 -9.31 -3.16
## 4 2017 10 SHOP 99.5 117 119831700 -17.5 -15.0
## 5 2015 9 LULU 50.7 62.8 111614500 -12.2 -19.4
## 6 2018 9 FRCOY 50.6 46.9 7288600 3.75 8.00
monthData %>% group_by(Stock) %>% filter(TotVolume == min(TotVolume)) %>%
arrange(TotVolume) %>% top_n(topN, TotVolume)
## # A tibble: 6 x 8
## # Groups: Stock [6]
## Year Month Stock Close Open TotVolume Incr PercInc
## <dbl> <dbl> <chr> <dbl> <dbl> <int> <dbl> <dbl>
## 1 2020 7 FRCOY 55.6 57.5 71000 -1.83 -3.18
## 2 2015 7 SHOP 37.4 32.4 3971500 5.02 15.5
## 3 2015 7 COST 145. 145. 14499900 0.720 0.498
## 4 2015 7 LULU 62.9 62.9 16428500 -0.0400 -0.0636
## 5 2020 7 TGT 122. 120. 35874000 1.76 1.47
## 6 2015 7 WMT 72.0 73.3 60954800 -1.37 -1.87
We are interested in seeing which stocks deviated (monthly volume higher than five year average) from the five year average monthly volume. We also wanna see the difference in stock value for these specific stocks
topN <- as.integer(12)
comDF %>% mutate(DiffPrice = Close - Open) %>% mutate(DiffPricePerc = 100*DiffPrice/Open) %>%
group_by(Stock) %>% mutate(AverageVolume = mean(Volume)) %>%
mutate(DiffVolume = Volume - AverageVolume) %>% mutate(DiffVolumePerc = 100*DiffVolume/AverageVolume) %>%
arrange(desc(DiffVolume)) %>% top_n(topN, DiffVolume)
## # A tibble: 72 x 14
## # Groups: Stock [6]
## Date Open High Low Close `Adj Close` Volume Stock Stck
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr> <fct>
## 1 2015-10-14 66.6 67.9 60.0 60.0 53.2 8.09e7 WMT WMT
## 2 2017-06-16 73.9 75.5 73.3 75.2 70.2 5.62e7 WMT WMT
## 3 2017-03-30 52.0 52.9 50.5 50.8 50.8 4.96e7 LULU LULU
## 4 2017-06-16 51.4 52.6 48.6 52.6 47.8 4.95e7 TGT TGT
## 5 2018-02-20 97.0 97.9 94.1 94.1 89.3 5.21e7 WMT WMT
## 6 2018-02-21 95.1 95.3 91.2 91.5 86.9 4.95e7 WMT WMT
## 7 2017-02-28 66.9 66.9 57.3 58.8 52.8 4.53e7 TGT TGT
## 8 2015-10-15 59.7 60.5 58.6 59.3 52.6 4.63e7 WMT WMT
## 9 2019-08-21 99.9 103. 98.1 103 101. 4.14e7 TGT TGT
## 10 2018-08-16 100. 100. 97.9 98.6 95.3 4.26e7 WMT WMT
## # … with 62 more rows, and 5 more variables: DiffPrice <dbl>,
## # DiffPricePerc <dbl>, AverageVolume <dbl>, DiffVolume <dbl>,
## # DiffVolumePerc <dbl>
In this step I am creating a candle stick plot with an overlay of the moving average. I am also plotting beneath it a volume plot over the same time frame. 1. The first step is to create a moving average formula using the stats::filter function. The parameters are the vector of interest and the days over which to average. 2. The next step is to create the levers that we wish to be changed when viewing the plot. The chosen levers are the start date and end date, as well as the number of days for the moving average 3. The candlestick plot is rendered using plot_ly function from the plotly package. Whenever there is an increase between two preceeding dates, the plot colour is green, and the plot colour is blue for decreases. The add_lines function is used to add an overlay of the moving average line. The beginning and end axis are stipulated using the levers.
4. The volume plot is a plot of bar plot of the monthly volume versus the dates. 5. The plots are combined using subplot1 function. The heights parameter determines the fractio of space taken up by each plot.
#1 moving average formula
mAverage <- function(x, n){stats::filter(x, rep(1 / n, n), sides = 2)}
#2 levers
StartDate <- "2017-02-26"
endDate <- "2019-02-25"
dayNum <-30
#3 candlestick plot
pricePlot <- uniqloDF %>% plot_ly(x = ~Date, type="candlestick",
open = ~Open, close = ~Close,
high = ~High, low = ~Low,
increasing = list(line = list(color = 'green')),
decreasing=list(line = list(color = 'blue')) )%>%
layout(title = "Uniqlo Candlestick Chart",
xaxis = list(range = c(StartDate, endDate))) %>%
add_lines(y = mAverage(uniqloDF$Close, dayNum), x = uniqloDF$Date, name = 'moving average', color = I('red'))
#4 volume plot
volPlot <- uniqloDF %>% plot_ly(x = ~Date, y = ~Volume, name = 'Volume', type = 'bar') %>%
layout(yaxis = list(title = "Volume"))
# combined plot
subplot(pricePlot, volPlot, heights = c(0.5,0.3), nrows=2, shareX = TRUE, titleY = TRUE) %>%
layout(legend = list(orientation = 'h', x = 0.7, y = 1, bgcolor = 'transparent',
xanchor = 'center', font = list(size = 12)))
The process in 7.1 is repeated but it is placed in a function and there is added functionlity to the moving average function.
An ifelse statement is used to toggle between the two possible states of the moving average function. Setting k=1, results in a assymetrical moving function and setting k=2 results in symmetrical moving function. But to make up for any other number being place in the formula, the symmetrical option is used for k is not equal to 1.
### Stock candlestick chat plotting function ####
stockPlot <- function(df, startDate, endDate, dayNum, k=2) {
#moving average function
ifelse(k==1, mAverage <- function(x, n){stats::filter(x, rep(1 / n, n), sides = 1)}, mAverage <- function(x, n){stats::filter(x, rep(1 / n, n), sides = 2)} )
#candlestick plot
pricePlot <- df %>% plot_ly(x = ~Date, type="candlestick",
open = ~Open, close = ~Close,
high = ~High, low = ~Low,
increasing = list(line = list(color = 'green')),
decreasing=list(line = list(color = 'blue')) )%>%
layout(title = "Candlestick Chart",
xaxis = list(range = c(startDate, endDate))) %>%
add_lines(y = mAverage(df$Close, dayNum), x = df$Date, name = 'moving average', color = I('red'))
#volume plot
volPlot <- df %>% plot_ly(x = ~Date, y = ~Volume, name = 'Volume', type = 'bar') %>%
layout(yaxis = list(title = "Volume"))
#combined plot
subplot(pricePlot, volPlot, heights = c(0.5,0.3), nrows=2, shareX = TRUE, titleY = TRUE) %>%
layout(legend = list(orientation = 'h', x = 0.7, y = 1, bgcolor = 'transparent',
xanchor = 'center', font = list(size = 12)))
}
#test function
stockPlot(luluDF, "2016-02-26", "2019-02-25", 30)
works for tibble, plotly and ggplot2 objects.↩